<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
  <head>

    <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
    <meta content="Cask Data, Inc." name="author" />
<meta content="Copyright © 2014 Cask Data, Inc." name="copyright" />


    <meta name="git_release" content="6.1.1">
    <meta name="git_hash" content="05fbac36f9f7aadeb44f5728cea35136dbc243e5">
    <meta name="git_timestamp" content="2020-02-09 08:22:47 +0800">
    <title>System and Custom Datasets</title>

    <link rel="stylesheet" href="../../_static/cdap-bootstrap.css" type="text/css" />
    <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
    <link rel="stylesheet" href="../../_static/bootstrap-3.3.6/css/bootstrap.min.css" type="text/css" />
    <link rel="stylesheet" href="../../_static/bootstrap-3.3.6/css/bootstrap-theme.min.css" type="text/css" />
    <link rel="stylesheet" href="../../_static/css/bootstrap-sphinx.css" type="text/css" />
    <link rel="stylesheet" href="../../_static/css/cdap-dynamicscrollspy-4.css" type="text/css" />
    <link rel="stylesheet" href="../../_static/css/jquery.mCustomScrollbar.css" type="text/css" />
    <link rel="stylesheet" href="../../_static/css/cdap-jquery.mCustomScrollbar.css" type="text/css" />
    <link rel="stylesheet" href="../../_static/css/abixTreeList-2.css" type="text/css" />
    <link rel="stylesheet" href="../../_static/cdap-bootstrap.css" type="text/css" />

    <script type="text/javascript">
      var DOCUMENTATION_OPTIONS = {
        URL_ROOT:    '',
        VERSION:     '6.1.1',
        COLLAPSE_INDEX: false,
        FILE_SUFFIX: '.html',
        HAS_SOURCE:  false
      };
    </script>
    <script type="text/javascript" src="../../_static/jquery.js"></script>
    <script type="text/javascript" src="../../_static/underscore.js"></script>
    <script type="text/javascript" src="../../_static/doctools.js"></script>
    <script type="text/javascript" src="../../_static/language_data.js"></script>

    <link rel="shortcut icon" href="../../_static/favicon.ico"/>
    <link rel="index" title="Index" href="../../genindex.html" />
    <link rel="search" title="Search" href="../../search.html" />
    <link rel="top" title="Cask Data Application Platform 6.1.1 Documentation" href="../../index.html" />
    <link rel="up" title="Datasets" href="index.html" />
    <link rel="next" title="Dataset Permissions" href="permissions.html" />
    <link rel="prev" title="TimePartitioned FileSet" href="time-partitioned-fileset.html" />
    <!-- block extrahead -->
    <meta charset='utf-8'>
    <meta http-equiv='X-UA-Compatible' content='IE=edge,chrome=1'>
    <meta name='viewport' content='width=device-width, initial-scale=1.0, maximum-scale=1'>
    <meta name="apple-mobile-web-app-capable" content="yes">
    <!-- block extrahead end -->

</head>
<body role="document">

<!-- block navbar -->
<div id="navbar" class="navbar navbar-inverse navbar-default navbar-fixed-top">
    <div class="container-fluid">
      <div class="row">
        <div class="navbar-header">
          <!-- .btn-navbar is used as the toggle for collapsed navbar content -->
          <a class="navbar-brand" href="../../table-of-contents/../../index.html">
            <span><img alt="CDAP logo" src="../../_static/cdap_logo.svg"/></span>
          </a>

          <button type="button" class="navbar-toggle" data-toggle="collapse" data-target=".nav-collapse">
            <span class="icon-bar"></span>
            <span class="icon-bar"></span>
            <span class="icon-bar"></span>
          </button>

          <div class="pull-right">
            <div class="dropdown version-dropdown">
              <a href="#" class="dropdown-toggle" data-toggle="dropdown"
                role="button" aria-haspopup="true" aria-expanded="false">
                v 6.1.1 <span class="caret"></span>
              </a>
              <ul class="dropdown-menu">
                <li><a href="//docs.cdap.io/cdap/5.1.2/en/index.html">v 5.1.2</a></li>
                <li><a href="//docs.cdap.io/cdap/4.3.4/en/index.html">v 4.3.4</a></li>
              </ul>
            </div>
          </div>
          <form class="navbar-form navbar-right navbar-search" action="../../search.html" method="get">
            <div class="form-group">
              <div class="navbar-search-image material-icons"></div>
              <input type="text" name="q" class="form-control" placeholder="  Search" />
            </div>
            <input type="hidden" name="check_keywords" value="yes" />
            <input type="hidden" name="area" value="default" />
          </form>

          <div class="collapse navbar-collapse nav-collapse navbar-right navbar-navigation">
            <ul class="nav navbar-nav"><li class="docsite-nav-tab-container"><a class="docsite-nav-tab-link " href="../../table-of-contents/../../index.html">简介</a></li><li class="docsite-nav-tab-container"><a class="docsite-nav-tab-link current" href="../../table-of-contents/../../guides.html">手册</a></li><li class="docsite-nav-tab-container"><a class="docsite-nav-tab-link " href="../../table-of-contents/../../reference-manual/index.html">参考</a></li><li class="docsite-nav-tab-container"><a class="docsite-nav-tab-link " href="../../table-of-contents/../../faqs/index.html">帮助</a></li>
            </ul>
          </div>

        </div>
      </div>
    </div>
  </div><!-- block navbar end -->
<!-- block main content -->
<div class="main-container container">
  <div class="row"><div class="col-md-2">
      <div id="sidebar" class="bs-sidenav scrollable-y-outside" role="complementary">
<!-- theme_manual: developer-manual -->
<!-- theme_manual_highlight: guides -->
<!-- sidebar_title_link: ../../table-of-contents/../../guides.html -->

  <div role="note" aria-label="manuals links"><h3><a href="../../table-of-contents/../../guides.html">Guides</a></h3>

    <ul class="this-page-menu">
      <li class="toctree-l1"><a href="../../table-of-contents/../../user-guide/index.html" rel="nofollow">用户手册</a>
      </li>
      <li class="toctree-l1"><b><a href="../../table-of-contents/../../developer-manual/index.html" rel="nofollow">开发手册</a></b>
      <nav class="pagenav">
      <ul class="current">
<li class="toctree-l1"><a class="reference internal" href="../../index.html"> 简介</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../getting-started/index.html"> 入门指南</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../../getting-started/sandbox/index.html">CDAP Sandbox</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../getting-started/sandbox/zip.html">二进制 Zip 文件</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../getting-started/sandbox/zip.html#cdap-sandbox">启动和停止 CDAP Sandbox</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../getting-started/sandbox/virtual-machine.html">虚拟机镜像</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../getting-started/sandbox/docker.html">Docker 镜像</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../getting-started/quick-start.html">快速入门</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../getting-started/dev-env.html">搭建开发环境</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../getting-started/start-stop-cdap.html">启动和停止 CDAP</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../getting-started/building-apps.html">构建并运行应用</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../overview/index.html"> 概述</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../../overview/anatomy.html"> 大数据应用剖析</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../overview/modes.html"> 模式和组件</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../overview/abstractions.html"> 核心概念</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../overview/interfaces.html"> 编程接口</a></li>
</ul>
</li>
<li class="toctree-l1 current"><a class="reference internal" href="../index.html"> 抽象概念</a><ul class="current">
<li class="toctree-l2"><a class="reference internal" href="../core.html"> Core Abstractions</a></li>
<li class="toctree-l2"><a class="reference internal" href="../applications.html"> Applications</a></li>
<li class="toctree-l2 current"><a class="reference internal" href="index.html"> Datasets</a><ul class="current">
<li class="toctree-l3"><a class="reference internal" href="overview.html"> Overview</a></li>
<li class="toctree-l3"><a class="reference internal" href="table.html"> Table API</a></li>
<li class="toctree-l3"><a class="reference internal" href="fileset.html"> FileSets</a></li>
<li class="toctree-l3"><a class="reference internal" href="partitioned-fileset.html"> Partitioned FileSets</a></li>
<li class="toctree-l3"><a class="reference internal" href="time-partitioned-fileset.html"> TimePartitioned FileSets</a></li>
<li class="toctree-l3 current"><a class="current reference internal" href="#"> System and Custom Datasets</a></li>
<li class="toctree-l3"><a class="reference internal" href="permissions.html"> Dataset Permissions</a></li>
<li class="toctree-l3"><a class="reference internal" href="cube.html"> Cube Dataset</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../mapreduce-programs.html"> MapReduce Programs</a></li>
<li class="toctree-l2"><a class="reference internal" href="../plugins.html"> Plugins</a></li>
<li class="toctree-l2"><a class="reference internal" href="../schedules.html"> Schedules</a></li>
<li class="toctree-l2"><a class="reference internal" href="../secure-keys.html"> Secure Keys</a></li>
<li class="toctree-l2"><a class="reference internal" href="../services.html"> Services</a></li>
<li class="toctree-l2"><a class="reference internal" href="../spark-programs.html"> Spark Programs</a></li>
<li class="toctree-l2"><a class="reference internal" href="../workers.html"> Workers</a></li>
<li class="toctree-l2"><a class="reference internal" href="../workflows.html"> Workflows</a></li>
<li class="toctree-l2"><a class="reference internal" href="../artifacts.html"> Artifacts</a></li>
<li class="toctree-l2"><a class="reference internal" href="../program-lifecycle.html"> Program Lifecycle</a></li>
<li class="toctree-l2"><a class="reference internal" href="../namespaces.html"> Namespaces</a></li>
<li class="toctree-l2"><a class="reference internal" href="../transaction-system.html"> Transaction System</a></li>
<li class="toctree-l2"><a class="reference internal" href="../transactional-messaging-system.html"> Transactional Messaging System</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../metadata/index.html"> 元数据</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../../metadata/system-metadata.html"> System Metadata</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../metadata/discovery-lineage.html"> Discovery and Lineage</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../metadata/field-lineage.html"> Field Level Lineage</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../metadata/audit-logging.html"> Audit Logging</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../metadata/metadata-ui.html"> CDAP Metadata UI</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../metadata/programmatic-metadata.html"> Accessing metadata programmatically</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../pipelines/index.html"> 数据流管道</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../../pipelines/concepts-design.html"> Concepts and Design</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../pipelines/getting-started.html"> Getting Started</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../pipelines/studio.html"> CDAP Studio</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../pipelines/creating-pipelines.html"> Creating Pipelines</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../pipelines/running-pipelines.html"> Running Pipelines</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../pipelines/plugin-management.html"> Plugin Management</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../pipelines/plugins/index.html"> Plugin Reference</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../pipelines/plugins/actions/index.html"> Action Plugins</a><ul class="simple">
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../pipelines/plugins/sources/index.html"> Source Plugins</a><ul class="simple">
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../pipelines/plugins/transforms/index.html"> Transform Plugins</a><ul class="simple">
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../pipelines/plugins/analytics/index.html"> Analytic Plugins</a><ul class="simple">
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../pipelines/plugins/sinks/index.html"> Sink Plugins</a><ul class="simple">
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../pipelines/plugins/shared-plugins/index.html"> Shared Plugins</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../pipelines/plugins/shared-plugins/core.html">CoreValidator</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../pipelines/plugins/post-run-plugins/index.html"> Post-run Plugins</a><ul class="simple">
</ul>
</li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../pipelines/developing-pipelines.html"> Developing Pipelines</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../pipelines/developing-plugins/index.html"> Developing Plugins</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../pipelines/developing-plugins/plugin-basics.html">Plugin Basics</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../pipelines/developing-plugins/creating-a-plugin.html">Creating a Plugin</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../pipelines/developing-plugins/presentation-plugins.html">Plugin Presentation</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../pipelines/developing-plugins/testing-plugins.html">Testing Plugins</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../pipelines/developing-plugins/packaging-plugins.html">Packaging Plugins</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../pipelines/how-cdap-pipelines-work.html"> How CDAP Pipelines Work</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../cloud-runtimes/index.html"> 云平台运行</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../../cloud-runtimes/concepts/index.html"> Concepts</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../cloud-runtimes/provisioners/index.html"> Provisioners</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../cloud-runtimes/provisioners/gcp-dataproc.html">Google Dataproc</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../cloud-runtimes/provisioners/aws-emr.html">Amazon Elastic MapReduce</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../cloud-runtimes/provisioners/remote-hadoop.html">Remote Hadoop</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../cloud-runtimes/profiles/index.html"> Profiles</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../cloud-runtimes/profiles/creating-profiles.html">Creating Profiles</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../cloud-runtimes/profiles/assigning-profiles.html">Assigning Profiles</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../cloud-runtimes/profiles/admin-controls.html">Admin Controls</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../cloud-runtimes/example/index.html"> Example</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../security/index.html"> 安全</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../../security/client-authentication.html">Client Authentication</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../security/cdap-authentication-clients-java.html">CDAP Authentication Client for Java</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../security/cdap-authentication-clients-python.html">CDAP Authentication Client for Python</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../security/custom-authentication.html">Custom Authentication</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../security/authorization-extensions.html">Authorization Extensions</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../testing/index.html"> 测试和调试</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../../testing/testing.html"> Testing a CDAP Application</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../testing/debugging.html"> Debugging</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../testing/troubleshooting.html"> Troubleshooting</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../ingesting-tools/index.html"> 数据融合</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../../ingesting-tools/cdap-stream-clients-java.html">CDAP Stream Client for Java</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../ingesting-tools/cdap-stream-clients-python.html">CDAP Stream Client for Python</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../ingesting-tools/cdap-stream-clients-ruby.html">CDAP Stream Client for Ruby</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../ingesting-tools/cdap-flume.html">CDAP Flume</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../data-exploration/index.html"> 数据探索</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../../data-exploration/filesets.html"> Fileset Exploration</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../data-exploration/tables.html"> Table Exploration</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../data-exploration/object-mapped-tables.html"> ObjectMappedTable Exploration</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../data-exploration/custom-datasets.html"> Custom Dataset Exploration</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../data-exploration/hive-execution-engines.html"> Hive Execution Engines</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../advanced/index.html"> 高级主题</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../../advanced/application-logback.html"> Application Logback</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../advanced/best-practices.html"> Best Practices</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../advanced/class-loading.html"> Class Loading</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../advanced/configuring-resources.html"> Configuring Program Resources</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../advanced/program-retry-policies.html"> Program Retry Policies</a></li>
</ul>
</li>
</ul>
</nav>
      </li>
      <li class="toctree-l1"><a href="../../table-of-contents/../../admin-manual/index.html" rel="nofollow">管理手册</a>
      </li>
      <li class="toctree-l1"><a href="../../table-of-contents/../../integrations/index.html" rel="nofollow">集成手册</a>
      </li>
      <li class="toctree-l1"><a href="../../table-of-contents/../../examples-manual/index.html" rel="nofollow">最佳实践</a>
      </li>
    </ul>
  </div></div>
    </div><div class="col-md-8 content" id="main-content">
    
  <div class="section" id="system-and-custom-datasets">
<h1>System and Custom Datasets<a class="headerlink" href="#system-and-custom-datasets" title="Permalink to this headline">🔗</a></h1>
<div class="section" id="system-datasets">
<span id="id1"></span><h2>System Datasets<a class="headerlink" href="#system-datasets" title="Permalink to this headline">🔗</a></h2>
<p>The Cask Data Application Platform comes with several system-defined datasets, including but not limited to
key/value Tables, indexed Tables and time series. Each of them is defined with the help of one or more embedded
Tables, but defines its own interface. Examples include:</p>
<ul class="simple">
<li>The <code class="docutils literal notranslate"><span class="pre">KeyValueTable</span></code> implements a key/value store as a Table with a single column.</li>
<li>The <code class="docutils literal notranslate"><span class="pre">IndexedTable</span></code> implements a Table with a secondary key using two embedded Tables,
one for the data and one for the secondary index.</li>
<li>The <code class="docutils literal notranslate"><span class="pre">TimeseriesTable</span></code> uses a Table to store keyed data over time
and allows querying that data over ranges of time.</li>
<li>The <code class="docutils literal notranslate"><span class="pre">ObjectMappedTable</span></code> uses a Table to store Java Objects by mapping object fields to
table columns. It can be explored through the use of ad-hoc SQL-like queries as described
in <a class="reference internal" href="../../data-exploration/object-mapped-tables.html#object-mapped-table-exploration"><span class="std std-ref">ObjectMappedTable Exploration</span></a>.</li>
</ul>
<p>See the <span class="xref std std-ref">Javadocs</span> for these classes. Any class in the CDAP libraries
that implements the <code class="docutils literal notranslate"><span class="pre">Dataset</span></code> interface is a system dataset.</p>
</div>
<div class="section" id="custom-datasets">
<span id="id2"></span><h2>Custom Datasets<a class="headerlink" href="#custom-datasets" title="Permalink to this headline">🔗</a></h2>
<p>You can define your own dataset classes to implement common data patterns specific to your code.</p>
<p>Suppose you want to define a counter table that, in addition to counting words,
counts how many unique words it has seen. The dataset can be built on top of two underlying datasets. The first a
Table (<code class="docutils literal notranslate"><span class="pre">entryCountTable</span></code>) to count all the words and the second a Table (<code class="docutils literal notranslate"><span class="pre">uniqueCountTable</span></code>) for the unique count.</p>
<p>When your custom dataset is built on top of one or more existing datasets, the simplest way to implement
it is to just define the data operations (by implementing the dataset interface) and delegating all other
work (such as  administrative operations) to the embedded dataset.</p>
<p>To do this, you need to implement the dataset class and define the embedded datasets by annotating
its constructor arguments.</p>
<p>In this case, our  <code class="docutils literal notranslate"><span class="pre">UniqueCountTableDefinition</span></code> will have two underlying datasets:
an <code class="docutils literal notranslate"><span class="pre">entryCountTable</span></code> and an <code class="docutils literal notranslate"><span class="pre">uniqueCountTable</span></code>, both of type <code class="docutils literal notranslate"><span class="pre">Table</span></code>:</p>
<div class="highlight-java notranslate"><div class="highlight"><pre><span></span><span class="kd">public</span> <span class="kd">class</span> <span class="nc">UniqueCountTable</span> <span class="kd">extends</span> <span class="n">AbstractDataset</span> <span class="p">{</span>

  <span class="kd">private</span> <span class="kd">final</span> <span class="n">Table</span> <span class="n">entryCountTable</span><span class="p">;</span>
  <span class="kd">private</span> <span class="kd">final</span> <span class="n">Table</span> <span class="n">uniqueCountTable</span><span class="p">;</span>

  <span class="kd">public</span> <span class="nf">UniqueCountTable</span><span class="p">(</span><span class="n">DatasetSpecification</span> <span class="n">spec</span><span class="p">,</span>
                          <span class="nd">@EmbeddedDataset</span><span class="p">(</span><span class="s">&quot;entryCountTable&quot;</span><span class="p">)</span> <span class="n">Table</span> <span class="n">entryCountTable</span><span class="p">,</span>
                          <span class="nd">@EmbeddedDataset</span><span class="p">(</span><span class="s">&quot;uniqueCountTable&quot;</span><span class="p">)</span> <span class="n">Table</span> <span class="n">uniqueCountTable</span><span class="p">)</span> <span class="p">{</span>
    <span class="kd">super</span><span class="p">(</span><span class="n">spec</span><span class="p">.</span><span class="na">getName</span><span class="p">(),</span> <span class="n">entryCountTable</span><span class="p">,</span> <span class="n">uniqueCountTable</span><span class="p">);</span>
    <span class="k">this</span><span class="p">.</span><span class="na">entryCountTable</span> <span class="o">=</span> <span class="n">entryCountTable</span><span class="p">;</span>
    <span class="k">this</span><span class="p">.</span><span class="na">uniqueCountTable</span> <span class="o">=</span> <span class="n">uniqueCountTable</span><span class="p">;</span>
  <span class="p">}</span>
</pre></div>
</div>
<p>In this case, the class must have one constructor that takes a <code class="docutils literal notranslate"><span class="pre">DatasetSpecification</span></code> as a first
parameter and any number of <code class="docutils literal notranslate"><span class="pre">Dataset</span></code>s annotated with the <code class="docutils literal notranslate"><span class="pre">&#64;EmbeddedDataset</span></code> annotation as the
remaining parameters. <code class="docutils literal notranslate"><span class="pre">&#64;EmbeddedDataset</span></code> takes the embedded dataset’s name as a parameter.</p>
<p>The <code class="docutils literal notranslate"><span class="pre">UniqueCountTable</span></code> stores a counter for each word in its own row of the entry count table.
For each word the counter is incremented. If the result of the increment is 1, then this is the first time
we’ve encountered that word, hence we have a new unique word and we then increment the unique counter:</p>
<div class="highlight-java notranslate"><div class="highlight"><pre><span></span><span class="kd">public</span> <span class="kt">void</span> <span class="nf">updateUniqueCount</span><span class="p">(</span><span class="n">String</span> <span class="n">entry</span><span class="p">)</span> <span class="p">{</span>
  <span class="kt">long</span> <span class="n">newCount</span> <span class="o">=</span> <span class="n">entryCountTable</span><span class="p">.</span><span class="na">incrementAndGet</span><span class="p">(</span><span class="k">new</span> <span class="n">Increment</span><span class="p">(</span><span class="n">entry</span><span class="p">,</span> <span class="s">&quot;count&quot;</span><span class="p">,</span> <span class="mi">1L</span><span class="p">)).</span><span class="na">getInt</span><span class="p">(</span><span class="s">&quot;count&quot;</span><span class="p">);</span>
  <span class="k">if</span> <span class="p">(</span><span class="n">newCount</span> <span class="o">==</span> <span class="mi">1L</span><span class="p">)</span> <span class="p">{</span>
    <span class="n">uniqueCountTable</span><span class="p">.</span><span class="na">increment</span><span class="p">(</span><span class="k">new</span> <span class="n">Increment</span><span class="p">(</span><span class="s">&quot;unique_count&quot;</span><span class="p">,</span> <span class="s">&quot;count&quot;</span><span class="p">,</span> <span class="mi">1L</span><span class="p">));</span>
  <span class="p">}</span>
<span class="p">}</span>
</pre></div>
</div>
<p>Finally, we write a method to retrieve the number of unique words seen:</p>
<div class="highlight-java notranslate"><div class="highlight"><pre><span></span><span class="kd">public</span> <span class="n">Long</span> <span class="nf">readUniqueCount</span><span class="p">()</span> <span class="p">{</span>
  <span class="k">return</span> <span class="n">uniqueCountTable</span><span class="p">.</span><span class="na">get</span><span class="p">(</span><span class="k">new</span> <span class="n">Get</span><span class="p">(</span><span class="s">&quot;unique_count&quot;</span><span class="p">,</span> <span class="s">&quot;count&quot;</span><span class="p">)).</span><span class="na">getLong</span><span class="p">(</span><span class="s">&quot;count&quot;</span><span class="p">);</span>
<span class="p">}</span>
</pre></div>
</div>
<p>All administrative operations (such as create, drop, truncate) will be delegated to the embedded datasets
in the order they are defined in the constructor. <code class="docutils literal notranslate"><span class="pre">DatasetProperties</span></code> that are passed during creation of
the dataset will be passed as-is to the embedded datasets.</p>
<p>To create a dataset of type <code class="docutils literal notranslate"><span class="pre">UniqueCountTable</span></code>, add the following into the application implementation:</p>
<div class="highlight-java notranslate"><div class="highlight"><pre><span></span><span class="n">Class</span> <span class="n">MyApp</span> <span class="kd">extends</span> <span class="n">AbstractApplication</span> <span class="p">{</span>
  <span class="kd">public</span> <span class="kt">void</span> <span class="nf">configure</span><span class="p">()</span> <span class="p">{</span>
    <span class="n">createDataset</span><span class="p">(</span><span class="s">&quot;myCounters&quot;</span><span class="p">,</span> <span class="n">UniqueCountTable</span><span class="p">.</span><span class="na">class</span><span class="p">)</span>
    <span class="p">...</span>
  <span class="p">}</span>
<span class="p">}</span>
</pre></div>
</div>
<div class="section" id="passing-properties">
<span id="custom-datasets-properties"></span><h3>Passing Properties<a class="headerlink" href="#passing-properties" title="Permalink to this headline">🔗</a></h3>
<p>You can also pass <code class="docutils literal notranslate"><span class="pre">DatasetProperties</span></code> as a third parameter to the <code class="docutils literal notranslate"><span class="pre">createDataset</span></code> method.
These properties will be used by embedded datasets during creation and will be available via the
<code class="docutils literal notranslate"><span class="pre">DatasetSpecification</span></code> passed to the dataset constructor. For example, to create a dataset with
a TTL (time-to-live, specified in seconds) property, you can use:</p>
<div class="highlight-java notranslate"><div class="highlight"><pre><span></span><span class="n">createDataset</span><span class="p">(</span><span class="s">&quot;frequentCustomers&quot;</span><span class="p">,</span> <span class="n">KeyValueTable</span><span class="p">.</span><span class="na">class</span><span class="p">,</span>
  <span class="n">DatasetProperties</span><span class="p">.</span><span class="na">builder</span><span class="p">()</span>
                   <span class="p">.</span><span class="na">add</span><span class="p">(</span><span class="n">Table</span><span class="p">.</span><span class="na">PROPERTY_TTL</span><span class="p">,</span> <span class="s">&quot;3600&quot;</span><span class="p">)</span>
                   <span class="p">.</span><span class="na">build</span><span class="p">());</span>
</pre></div>
</div>
<p>You can pass other properties, such as for
<a class="reference internal" href="../transaction-system.html#transaction-system-conflict-detection"><span class="std std-ref">conflict detection</span></a> and for
<a class="reference internal" href="table.html#table-datasets-pre-splitting"><span class="std std-ref">pre-splitting into multiple regions</span></a>.</p>
</div>
<div class="section" id="accessing-a-dataset">
<span id="custom-datasets-accessing-datasets"></span><h3>Accessing a Dataset<a class="headerlink" href="#accessing-a-dataset" title="Permalink to this headline">🔗</a></h3>
<p>Application components can access a custom dataset in the same way as all other datasets:
via either the <code class="docutils literal notranslate"><span class="pre">&#64;UseDataSet</span></code> annotation, or the <code class="docutils literal notranslate"><span class="pre">getDataset()</span></code> method of the program context.
This is described in more detail in the section on
<a class="reference internal" href="overview.html#datasets-in-programs"><span class="std std-ref">Using Datasets in Programs</span></a>.</p>
<p>You can also create, drop, and truncate datasets using the <span class="xref std std-ref">http-restful-api-dataset</span>.</p>
</div>
<div class="section" id="annotating-dataset-methods">
<span id="custom-datasets-access-annotations"></span><h3>Annotating Dataset Methods<a class="headerlink" href="#annotating-dataset-methods" title="Permalink to this headline">🔗</a></h3>
<p>Dataset methods can be annotated with the type of access that they perform on data.
Annotations help the CDAP runtime to enforce <span class="xref std std-ref">authorization</span>,
as well as track <a class="reference internal" href="../../metadata/discovery-lineage.html#metadata-lineage"><span class="std std-ref">lineage</span></a>. Dataset methods (including
constructors) can be annotated with one of:</p>
<ul class="simple">
<li><code class="docutils literal notranslate"><span class="pre">&#64;ReadOnly</span></code>: Denotes that a method or constructor performs only <strong>read</strong> operations</li>
<li><code class="docutils literal notranslate"><span class="pre">&#64;WriteOnly</span></code>: Denotes that a method or constructor performs only <strong>write</strong> operations</li>
<li><code class="docutils literal notranslate"><span class="pre">&#64;ReadWrite</span></code>: Denotes that a method or constructor performs both <strong>read</strong> and <strong>write</strong> operations</li>
</ul>
<p>Methods in <a class="reference internal" href="#system-datasets"><span class="std std-ref">System Datasets</span></a> already contain appropriate
annotations. For <a class="reference internal" href="#custom-datasets"><span class="std std-ref">Custom Datasets</span></a>, it is the responsibility of the
developer to appropriately annotate methods.</p>
</div>
</div>
</div>

</div>
    <div class="col-md-2">
      <div id="right-sidebar" class="bs-sidenav scrollable-y" role="complementary">
        <div id="localtoc-scrollspy">
        </div>
      </div>
    </div></div>
</div>
<!-- block main content end -->
<!-- block footer -->
<footer class="footer">
      <div class="container">
        <div class="row">
          <div class="col-md-2 footer-left"><a title="TimePartitioned FileSet" href="time-partitioned-fileset.html" />Previous</a></div>
          <div class="col-md-8 footer-center"><a class="footer-tab-link" href="../../table-of-contents/../../reference-manual/licenses/index.html">Copyright</a> &copy; 2014-2020 Cask Data, Inc.&bull; <a class="footer-tab-link" href="//docs.cask.co/cdap/6.1.1/cdap-docs-6.1.1-web.zip" rel="nofollow">Download</a> an archive or
<a class="footer-tab-link" href="//docs.cask.co/cdap">switch the version</a> of the documentation
          </div>
          <div class="col-md-2 footer-right"><a title="Dataset Permissions" href="permissions.html" />Next</a></div>
        </div>
      </div>
    </footer>
<!-- block footer end -->
<script type="text/javascript" src="../../_static/bootstrap-3.3.6/js/bootstrap.min.js"></script><script type="text/javascript" src="../../_static/js/bootstrap-sphinx.js"></script><script type="text/javascript" src="../../_static/js/abixTreeList-2.js"></script><script type="text/javascript" src="../../_static/js/cdap-dynamicscrollspy-4.js"></script><script type="text/javascript" src="../../_static/js/cdap-version-menu.js"></script><script type="text/javascript" src="../../_static/js/copy-to-clipboard.js"></script><script type="text/javascript" src="../../_static/js/jquery.mousewheel.min.js"></script><script type="text/javascript" src="../../_static/js/jquery.mCustomScrollbar.js"></script><script type="text/javascript" src="../../_static/js/js.cookie.js"></script><script type="text/javascript" src="../../_static/js/tabbed-parsed-literal-0.2.js"></script><script type="text/javascript" src="../../_static/js/cdap-onload-javascript.js"></script><script type="text/javascript" src="../../_static/js/cdap-version-menu.js"></script>
    <script src="https://cdap.gitee.io/docs/cdap/json-versions.js"/></script>
  </body>
</html>